--- title: Data augmentation in computer vision keywords: fastai sidebar: home_sidebar summary: "Transforms to apply data augmentation in Computer Vision" description: "Transforms to apply data augmentation in Computer Vision" ---
{% raw %}
from nbdev.showdoc import *
img = PILImage(PILImage.create(TEST_IMAGE).resize((600,400)))

General architecture

class RandTransform[source]

RandTransform(p=1.0, nm=None, before_call=None, **kwargs) :: Transform

A transform that before_call its state at each __call__

As for all Transform you can pass encodes and decodes at init or subclass and implement them. You can do the same for the before_call method that is called at each __call__. Note that to have a consistent state for inputs and targets, a RandTransform must be applied at the tuple level.

By default the before_call behavior is to execute the transform with probability p (if subclassing and wanting to tweak that behavior, the attribute self.do, if it exists, is looked for to decide if the transform is executed or not). {% include note.html content='A RandTransform is only applied to the training set by default, so you have to pass split_idx=0 if you are calling it directly and not through a Datasets. That behavior can be changed by setting the attr split_idx of the transform to None.' %}

RandTransform.before_call[source]

RandTransform.before_call(b, split_idx)

before_call the state for input b

def _add1(x): return x+1
dumb_tfm = RandTransform(enc=_add1, p=0.5)
start,d1,d2 = 2,False,False
for _ in range(40):
    t = dumb_tfm(start, split_idx=0)
    if dumb_tfm.do: test_eq(t, start+1); d1=True
    else:           test_eq(t, start)  ; d2=True
assert d1 and d2

Item transforms

Image.flip_lr[source]

Image.flip_lr(x:Image)

TensorImageBase.flip_lr[source]

TensorImageBase.flip_lr(x:TensorImageBase)

TensorPoint.flip_lr[source]

TensorPoint.flip_lr(x:TensorPoint)

TensorBBox.flip_lr[source]

TensorBBox.flip_lr(x:TensorBBox)

_,axs = subplots(1,2)
show_image(img, ctx=axs[0], title='original')
show_image(img.flip_lr(), ctx=axs[1], title='flipped');

class FlipItem[source]

FlipItem(p=0.5) :: RandTransform

Randomly flip with probability p

tflip = FlipItem(p=1.)
test_eq(tflip(bbox,split_idx=0), tensor([[1.,0., 0.,1]]) -1)

PILImage.dihedral[source]

PILImage.dihedral(x:PILImage, k)

TensorImage.dihedral[source]

TensorImage.dihedral(x:TensorImage, k)

TensorPoint.dihedral[source]

TensorPoint.dihedral(x:TensorPoint, k)

TensorBBox.dihedral[source]

TensorBBox.dihedral(x:TensorBBox, k)

By default each of the 8 dihedral transformations (including noop) have the same probability of being picked when the transform is applied. You can customize this behavior by passing your own draw function. To force a specific flip, you can also pass an integer between 0 and 7.

class DihedralItem[source]

DihedralItem(p=0.5) :: RandTransform

Randomly flip with probability p

_,axs = subplots(2, 4)
for ax in axs.flatten():
    show_image(DihedralItem(p=1.)(img, split_idx=0), ctx=ax)

Resize with crop, pad or squish

class PadMode[source]

PadMode(*args, **kwargs)

All possible padding mode as attributes to get tab-completion and typo-proofing

TensorBBox.crop_pad[source]

TensorBBox.crop_pad(x:Image'>), sz, tl=None, orig_sz=None, pad_mode='zeros', resize_mode=2, resize_to=None)

TensorPoint.crop_pad[source]

TensorPoint.crop_pad(x:Image'>), sz, tl=None, orig_sz=None, pad_mode='zeros', resize_mode=2, resize_to=None)

Image.crop_pad[source]

Image.crop_pad(x:Image'>), sz, tl=None, orig_sz=None, pad_mode='zeros', resize_mode=2, resize_to=None)

class CropPad[source]

CropPad(size, pad_mode='zeros', **kwargs) :: Transform

Center crop or pad an image to size

_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,sz in zip(axs.flatten(), [300, 500, 700]):
    show_image(img.crop_pad(sz), ctx=ax, title=f'Size {sz}');
_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,mode in zip(axs.flatten(), [PadMode.Zeros, PadMode.Border, PadMode.Reflection]):
    show_image(img.crop_pad((600,700), pad_mode=mode), ctx=ax, title=mode);

class RandomCrop[source]

RandomCrop(size, **kwargs) :: RandTransform

Randomly crop an image to size

class OldRandomCrop[source]

OldRandomCrop(size, pad_mode='zeros', enc=None, dec=None, split_idx=None, order=None) :: CropPad

Randomly crop an image to size

_,axs = plt.subplots(1,3,figsize=(12,4))
f = RandomCrop(200)
for ax in axs: show_image(f(img), ctx=ax);

On the validation set, we take a center crop.

_,axs = plt.subplots(1,3,figsize=(12,4))
for ax in axs: show_image(f(img, split_idx=1), ctx=ax);

class ResizeMethod[source]

ResizeMethod(*args, **kwargs)

All possible resize method as attributes to get tab-completion and typo-proofing

test_eq(ResizeMethod.Squish, 'squish')

class Resize[source]

Resize(size, method='crop', pad_mode='reflection', resamples=(2, 0), **kwargs) :: RandTransform

A transform that before_call its state at each __call__

size can be an integer (in which case images will be resized to a square) or a tuple. Depending on the method:

  • we squish any rectangle to size
  • we resize so that the shorter dimension is a match an use padding with pad_mode
  • we resize so that the larger dimension is match and crop (randomly on the training set, center crop for the validation set)

When doing the resize, we use resamples[0] for images and resamples[1] for segmentation masks.

_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,method in zip(axs.flatten(), [ResizeMethod.Squish, ResizeMethod.Pad, ResizeMethod.Crop]):
    rsz = Resize(256, method=method)
    show_image(rsz(img, split_idx=0), ctx=ax, title=method);

On the validation set, the crop is always a center crop (on the dimension that's cropped).

_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,method in zip(axs.flatten(), [ResizeMethod.Squish, ResizeMethod.Pad, ResizeMethod.Crop]):
    rsz = Resize(256, method=method)
    show_image(rsz(img, split_idx=1), ctx=ax, title=method);

class RandomResizedCrop[source]

RandomResizedCrop(size, min_scale=0.08, ratio=(0.75, 1.3333333333333333), resamples=(2, 0), val_xtra=0.14, **kwargs) :: RandTransform

Picks a random scaled crop of an image and resize it to size

The crop picked as a random scale in range (min_scale,1) and ratio in the range passed, then the resize is done with resamples[0] for images and resamples[1] for segmentation masks. On the validation set, we center crop the image if it's ratio isn't in the range (to the minmum or maximum value) then resize.

crop = RandomResizedCrop(256)
_,axs = plt.subplots(3,3,figsize=(9,9))
for ax in axs.flatten():
    cropped = crop(img)
    show_image(cropped, ctx=ax);

Squish is used on the validation set, removing val_xtra proportion of each side first.

_,axs = subplots(1,3)
for ax in axs.flatten(): show_image(crop(img, split_idx=1), ctx=ax);
test_eq(cropped.shape, [256,256])

class RatioResize[source]

RatioResize(max_sz, resamples=(2, 0)) :: Transform

Resizes the biggest dimension of an image to max_sz maintaining the aspect ratio

RatioResize(256)(img)
test_eq(RatioResize(256)(img).size[0], 256)
test_eq(RatioResize(256)(img.dihedral(3)).size[1], 256)

Affine and coord tfm on the GPU

timg = TensorImage(array(img)).permute(2,0,1).float()/255.
def _batch_ex(bs): return TensorImage(timg[None].expand(bs, *timg.shape).clone())

TensorImage.affine_coord[source]

TensorImage.affine_coord(x:TensorImage, mat=None, coord_tfm=None, sz=None, mode='bilinear', pad_mode='reflection', align_corners=True)

TensorMask.affine_coord[source]

TensorMask.affine_coord(x:TensorMask, mat=None, coord_tfm=None, sz=None, mode='nearest', pad_mode='reflection', align_corners=True)

TensorPoint.affine_coord[source]

TensorPoint.affine_coord(x:TensorPoint, mat=None, coord_tfm=None, sz=None, mode='nearest', pad_mode='zeros', align_corners=True)

TensorBBox.affine_coord[source]

TensorBBox.affine_coord(x:TensorBBox, mat=None, coord_tfm=None, sz=None, mode='nearest', pad_mode='zeros', align_corners=True)

class AffineCoordTfm[source]

AffineCoordTfm(aff_fs=None, coord_fs=None, size=None, mode='bilinear', pad_mode='reflection', mode_mask='nearest', align_corners=None) :: RandTransform

Combine and apply affine and coord transforms

Multipliy all the matrices returned by aff_fs before doing the corresponding affine transformation on a basic grid corresponding to size, then applies all coord_fs on the resulting flow of coordinates before finally doing an interpolation with mode and pad_mode.

AffineCoordTfm.compose[source]

AffineCoordTfm.compose(tfm)

Compose self with another AffineCoordTfm to only do the interpolation step once

class RandomResizedCropGPU[source]

RandomResizedCropGPU(size, min_scale=0.08, ratio=(0.75, 1.3333333333333333), mode='bilinear', valid_scale=1.0, **kwargs) :: RandTransform

Picks a random scaled crop of an image and resize it to size

t = _batch_ex(8)
rrc = RandomResizedCropGPU(224, p=1.)
y = rrc(t)
_,axs = plt.subplots(2,4, figsize=(12,6))
for ax in axs.flatten():
    show_image(y[i], ctx=ax)

Flip/Dihedral GPU helpers

affine_mat[source]

affine_mat(*ms)

Restructure length-6 vector ms into an affine matrix with 0,0,1 in the last line

mask_tensor[source]

mask_tensor(x, p=0.5, neutral=0.0, batch=False)

Mask elements of x with neutral with probability 1-p

x = torch.zeros(5,2,3)
def_draw = lambda x: torch.randint(0,8, (x.size(0),))
t = _draw_mask(x, def_draw)
assert (0. <= t).all() and (t <= 7).all() 
t = _draw_mask(x, def_draw, 1)
assert (0. <= t).all() and (t <= 1).all() 
test_eq(_draw_mask(x, def_draw, 1, p=1), tensor([1.,1,1,1,1]))
test_eq(_draw_mask(x, def_draw, [0,1,2,3,4], p=1), tensor([0.,1,2,3,4]))
for i in range(5):
    t = _draw_mask(x, def_draw, 1, batch=True)
    assert (t==torch.zeros(5)).all() or (t==torch.ones(5)).all()

flip_mat[source]

flip_mat(x, p=0.5, draw=None, batch=False)

Return a random flip matrix

x = flip_mat(torch.randn(100,4,3))
test_eq(set(x[:,0,0].numpy()), {-1,1}) #might fail with probability 2*2**(-100) (picked only 1s or -1s)

TensorImage.flip_batch[source]

TensorImage.flip_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, align_corners=True, batch=False)

TensorMask.flip_batch[source]

TensorMask.flip_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, align_corners=True, batch=False)

TensorPoint.flip_batch[source]

TensorPoint.flip_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, align_corners=True, batch=False)

TensorBBox.flip_batch[source]

TensorBBox.flip_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, align_corners=True, batch=False)

t = _pnt2tensor([[1,0], [2,1]], (3,3))
y = TensorImage(t[None,None]).flip_batch(p=1.)
test_eq(y, _pnt2tensor([[1,0], [0,1]], (3,3))[None,None])

pnts = TensorPoint((tensor([[1.,0.], [2,1]]) -1)[None])
test_eq(pnts.flip_batch(p=1.), tensor([[[1.,0.], [0,1]]]) -1)

bbox = TensorBBox(((tensor([[1.,0., 2.,1]]) -1)[None]))
test_eq(bbox.flip_batch(p=1.), tensor([[[0.,0., 1.,1.]]]) -1)

Flip[source]

Flip(p=0.5, draw=None, size=None, mode='bilinear', pad_mode='reflection', align_corners=True, batch=False)

Randomly flip a batch of images with a probability p

flip = Flip(p=1.)
t = _pnt2tensor([[1,0], [2,1]], (3,3))
y = flip(TensorImage(t[None,None]), split_idx=0)
test_eq(y, _pnt2tensor([[1,0], [0,1]], (3,3))[None,None])

pnts = TensorPoint((tensor([[1.,0.], [2,1]]) -1)[None])
test_eq(flip(pnts, split_idx=0), tensor([[[1.,0.], [0,1]]]) -1)

bbox = TensorBBox(((tensor([[1.,0., 2.,1]]) -1)[None]))
test_eq(flip(bbox, split_idx=0), tensor([[[0.,0., 1.,1.]]]) -1)

class DeterministicDraw[source]

DeterministicDraw(vals)

t =  _batch_ex(8)
draw = DeterministicDraw(list(range(8)))
for i in range(15): test_eq(draw(t), torch.zeros(8)+(i%8))

DeterministicFlip[source]

DeterministicFlip(size=None, mode='bilinear', pad_mode='reflection', align_corners=True)

Flip the batch every other call

t = _batch_ex(8)
dih = DeterministicFlip()
_,axs = plt.subplots(2,4, figsize=(12,6))
for i,ax in enumerate(axs.flatten()):
    y = dih(t)
    show_image(y[0], ctx=ax, title=f'Call {i}')

dihedral_mat[source]

dihedral_mat(x, p=0.5, draw=None, batch=False)

Return a random dihedral matrix

TensorImage.dihedral_batch[source]

TensorImage.dihedral_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, batch=False, align_corners=True)

TensorMask.dihedral_batch[source]

TensorMask.dihedral_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, batch=False, align_corners=True)

TensorPoint.dihedral_batch[source]

TensorPoint.dihedral_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, batch=False, align_corners=True)

TensorBBox.dihedral_batch[source]

TensorBBox.dihedral_batch(x:TensorBBox'>), p=0.5, draw=None, size=None, mode=None, pad_mode=None, batch=False, align_corners=True)

Dihedral[source]

Dihedral(p=0.5, draw=None, size=None, mode='bilinear', pad_mode='reflection', align_corners=None, batch=False)

Apply a random dihedral transformation to a batch of images with a probability p

draw can be specified if you want to customize which flip is picked when the transform is applied (default is a random number between 0 and 7). It can be an integer between 0 and 7, a list of such integers (which then should have a length equal to the size of the batch) or a callable that returns an integer between 0 and 7.

t = _batch_ex(8)
dih = Dihedral(p=1., draw=list(range(8)))
y = dih(t)
y = t.dihedral_batch(p=1., draw=list(range(8)))
_,axs = plt.subplots(2,4, figsize=(12,5))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'Flip {i}')

DeterministicDihedral[source]

DeterministicDihedral(size=None, mode='bilinear', pad_mode='reflection', align_corners=None)

Flip the batch every other call

t = _batch_ex(8)
dih = DeterministicDihedral()
_,axs = plt.subplots(2,4, figsize=(12,6))
for i,ax in enumerate(axs.flatten()):
    y = dih(t)
    show_image(y[0], ctx=ax, title=f'Call {i}')

rotate_mat[source]

rotate_mat(x, max_deg=10, p=0.5, draw=None, batch=False)

Return a random rotation matrix with max_deg and p

TensorImage.rotate[source]

TensorImage.rotate(x:TensorBBox'>), size=None, mode=None, pad_mode=None, align_corners=True, **kwargs)

TensorMask.rotate[source]

TensorMask.rotate(x:TensorBBox'>), size=None, mode=None, pad_mode=None, align_corners=True, **kwargs)

TensorPoint.rotate[source]

TensorPoint.rotate(x:TensorBBox'>), size=None, mode=None, pad_mode=None, align_corners=True, **kwargs)

TensorBBox.rotate[source]

TensorBBox.rotate(x:TensorBBox'>), size=None, mode=None, pad_mode=None, align_corners=True, **kwargs)

Rotate[source]

Rotate(max_deg=10, p=0.5, draw=None, size=None, mode='bilinear', pad_mode='reflection', align_corners=True, batch=False)

Apply a random rotation of at most max_deg with probability p to a batch of images

draw can be specified if you want to customize which angle is picked when the transform is applied (default is a random flaot between -max_deg and max_deg). It can be a float, a list of floats (which then should have a length equal to the size of the batch) or a callable that returns a float.

thetas = [-30,-15,0,15,30]
y = _batch_ex(5).rotate(draw=thetas, p=1.)
_,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()):
    show_image(y[i], ctx=ax, title=f'{thetas[i]} degrees')

zoom_mat[source]

zoom_mat(x, max_zoom=1.1, p=0.5, draw=None, draw_x=None, draw_y=None, batch=False)

Return a random zoom matrix with max_zoom and p

TensorImage.zoom[source]

TensorImage.zoom(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

TensorMask.zoom[source]

TensorMask.zoom(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

TensorPoint.zoom[source]

TensorPoint.zoom(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

TensorBBox.zoom[source]

TensorBBox.zoom(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

Zoom[source]

Zoom(max_zoom=1.1, p=0.5, draw=None, draw_x=None, draw_y=None, size=None, mode='bilinear', pad_mode='reflection', batch=False, align_corners=True)

Apply a random zoom of at most max_zoom with probability p to a batch of images

draw, draw_x and draw_y can be specified if you want to customize which scale and center are picked when the transform is applied (default is a random float between 1 and max_zoom for the first, between 0 and 1 for the last two). Each can be a float, a list of floats (which then should have a length equal to the size of the batch) or a callbale that returns a float.

draw_x and draw_y are expected to be the position of the center in pct, 0 meaning the most left/top possible and 1 meaning the most right/bottom possible.

scales = [1., 1.1, 1.25, 1.5]
y = _batch_ex(4).zoom(draw=scales, p=1., draw_x=0.5, draw_y=0.5)
fig,axs = plt.subplots(1,4, figsize=(12,3))
fig.suptitle('Center zoom with different scales')
for i,ax in enumerate(axs.flatten()):
    show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
y = _batch_ex(4).zoom(p=1., draw=1.5)
fig,axs = plt.subplots(1,4, figsize=(12,3))
fig.suptitle('Constant scale and different random centers')
for i,ax in enumerate(axs.flatten()):
    show_image(y[i], ctx=ax)

Warping

find_coeffs[source]

find_coeffs(p1, p2)

Find coefficients for warp tfm from p1 to p2

apply_perspective[source]

apply_perspective(coords, coeffs)

Apply perspective tranfom on coords with coeffs

TensorImage.warp[source]

TensorImage.warp(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

TensorMask.warp[source]

TensorMask.warp(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

TensorPoint.warp[source]

TensorPoint.warp(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

TensorBBox.warp[source]

TensorBBox.warp(x:TensorBBox'>), size=None, mode='bilinear', pad_mode='reflection', align_corners=True, **kwargs)

Warp[source]

Warp(magnitude=0.2, p=0.5, draw_x=None, draw_y=None, size=None, mode='bilinear', pad_mode='reflection', batch=False, align_corners=True)

Apply perspective warping with magnitude and p on a batch of matrices

draw_x and draw_y can be specified if you want to customize the magnitudes that are picked when the transform is applied (default is a random float between -magnitude and magnitude. Each can be a float, a list of floats (which then should have a length equal to the size of the batch) or a callable that returns a float.

scales = [-0.4, -0.2, 0., 0.2, 0.4]
warp = Warp(p=1., draw_y=scales, draw_x=0.)
y = warp(_batch_ex(5), split_idx=0)
fig,axs = plt.subplots(1,5, figsize=(15,3))
fig.suptitle('Vertical warping')
for i,ax in enumerate(axs.flatten()):
    show_image(y[i], ctx=ax, title=f'magnitude {scales[i]}')
scales = [-0.4, -0.2, 0., 0.2, 0.4]
warp = Warp(p=1., draw_x=scales, draw_y=0.)
y = warp(_batch_ex(5), split_idx=0)
fig,axs = plt.subplots(1,5, figsize=(15,3))
fig.suptitle('Horizontal warping')
for i,ax in enumerate(axs.flatten()):
    show_image(y[i], ctx=ax, title=f'magnitude {scales[i]}')

Lighting transforms

TensorImage.lighting[source]

TensorImage.lighting(x:TensorImage, func)

class LightingTfm[source]

LightingTfm(fs) :: RandTransform

Apply fs to the logits

TensorImage.brightness[source]

TensorImage.brightness(x:TensorImage, **kwargs)

Brightness[source]

Brightness(max_lighting=0.2, p=0.75, draw=None, batch=False)

Apply change in brightness of max_lighting to batch of images with probability p.

draw can be specified if you want to customize the magnitude that is picked when the transform is applied (default is a random float between -0.5*(1-max_lighting) and 0.5*(1+max_lighting). Each can be a float, a list of floats (which then should have a length equal to the size of the batch) or a callable that returns a float.

scales = [0.1, 0.3, 0.5, 0.7, 0.9]
y = _batch_ex(5).brightness(draw=scales, p=1.)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()):
    show_image(y[i], ctx=ax, title=f'scale {scales[i]}')

TensorImage.contrast[source]

TensorImage.contrast(x:TensorImage, **kwargs)

Contrast[source]

Contrast(max_lighting=0.2, p=0.75, draw=None, batch=False)

Apply change in contrast of max_lighting to batch of images with probability p.

draw can be specified if you want to customize the magnitude that is picked when the transform is applied (default is a random float taken with the log uniform distribution between (1-max_lighting) and 1/(1-max_lighting). Each can be a float, a list of floats (which then should have a length equal to the size of the batch) or a callable that returns a float.

scales = [0.65, 0.8, 1., 1.25, 1.55]
y = _batch_ex(5).contrast(p=1., draw=scales)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')

RandomErasing

Random Erasing Data Augmentation. This variant, designed by Ross Wightman, is applied to either a batch or single image tensor after it has been normalized.

cutout_gaussian[source]

cutout_gaussian(x, areas)

Replace all areas in x with N(0,1) noise

Since this should be applied after normalization, we'll define a helper to apply a function inside normalization.

norm_apply_denorm[source]

norm_apply_denorm(x, f, nrm)

Normalize x with nrm, then apply f, then denormalize

nrm = Normalize.from_stats(*imagenet_stats, cuda=False)
f = partial(cutout_gaussian, areas=[(100,200,100,200),(200,300,200,300)])
show_image(norm_apply_denorm(timg, f, nrm)[0]);

class RandomErasing[source]

RandomErasing(p=0.5, sl=0.0, sh=0.3, min_aspect=0.3, max_count=1) :: RandTransform

Randomly selects a rectangle region in an image and randomizes its pixels.

Args:

  • p: The probability that the Random Erasing operation will be performed
  • sl: Minimum proportion of erased area
  • sh: Maximum proportion of erased area
  • min_aspect: Minimum aspect ratio of erased area
  • max_count: maximum number of erasing blocks per image, area per box is scaled by count
tfm = RandomErasing(p=1., max_count=6)

_,axs = subplots(2,3, figsize=(12,6))
f = partial(tfm, split_idx=0)
for i,ax in enumerate(axs.flatten()): show_image(norm_apply_denorm(timg, f, nrm)[0], ctx=ax)
y = _batch_ex(6)
_,axs = plt.subplots(2,3, figsize=(12,6))
y = norm_apply_denorm(y, f, nrm)
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax)
tfm = RandomErasing(p=1., max_count=6)

_,axs = subplots(2,3, figsize=(12,6))
f = partial(tfm, split_idx=1)
for i,ax in enumerate(axs.flatten()): show_image(norm_apply_denorm(timg, f, nrm)[0], ctx=ax)

All together

setup_aug_tfms[source]

setup_aug_tfms(tfms)

Go through tfms and combines together affine/coord or lighting transforms

#Affine only
tfms = [Rotate(draw=10., p=1), Zoom(draw=1.1, draw_x=0.5, draw_y=0.5, p=1.)]
comp = setup_aug_tfms([Rotate(draw=10., p=1), Zoom(draw=1.1, draw_x=0.5, draw_y=0.5, p=1.)])
test_eq(len(comp), 1)
x = torch.randn(4,3,5,5)
test_close(comp[0]._get_affine_mat(x)[...,:2],tfms[0]._get_affine_mat(x)[...,:2] @ tfms[1]._get_affine_mat(x)[...,:2])
#We can't test that the ouput of comp or the composition of tfms on x is the same cause it's not (1 interpol vs 2 sp)
#Affine + lighting
tfms = [Rotate(), Zoom(), Warp(), Brightness(), Flip(), Contrast()]
comp = setup_aug_tfms(tfms)
test_eq(len(comp), 2)
test_eq(len(comp[0].aff_fs), 3)
test_eq(len(comp[0].coord_fs), 1)
test_eq(len(comp[1].fs), 2)
#Affine + lighting + others
tfms = [Rotate(), Zoom(), Warp(), Brightness(), Flip(), Contrast()]
comp = setup_aug_tfms(tfms)
test_eq(len(comp), 2)
test_eq(len(comp[0].aff_fs), 3)
test_eq(len(comp[0].coord_fs), 1)
test_eq(len(comp[1].fs), 2)

aug_transforms[source]

aug_transforms(mult=1.0, do_flip=True, flip_vert=False, max_rotate=10.0, max_zoom=1.1, max_lighting=0.2, max_warp=0.2, p_affine=0.75, p_lighting=0.75, xtra_tfms=None, size=None, mode='bilinear', pad_mode='reflection', align_corners=True, batch=False, min_scale=1.0)

Utility func to easily create a list of flip, rotate, zoom, warp, lighting transforms.

Random flip (or dihedral if flip_vert=True) with p=0.5 is added when do_flip=True. With p_affine we apply a random rotation of max_rotate degrees, a random zoom of max_zoom and a perspective warping of max_warp. With p_lighting we apply a change in brightness and contrast of max_lighting. Custon xtra_tfms can be added. size, mode and pad_mode will be used for the interpolation. max_rotate,max_lighting,max_warp are multiplied by mult so you can more easily increase or decrease augmentation with a single parameter.

tfms = aug_transforms(pad_mode='zeros', mult=2, min_scale=0.5)
y = _batch_ex(9)
for t in tfms: y = t(y, split_idx=0)
_,axs = plt.subplots(1,3, figsize=(12,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax)
tfms = aug_transforms(pad_mode='zeros', mult=2, batch=True)
y = _batch_ex(9)
for t in tfms: y = t(y, split_idx=0)
_,axs = plt.subplots(1,3, figsize=(12,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax)

Integration tests

Segmentation

camvid = untar_data(URLs.CAMVID_TINY)
fns = get_image_files(camvid/'images')
cam_fn = fns[0]
mask_fn = camvid/'labels'/f'{cam_fn.stem}_P{cam_fn.suffix}'
def _cam_lbl(fn): return mask_fn
cam_dsrc = Datasets([cam_fn]*10, [PILImage.create, [_cam_lbl, PILMask.create]])
cam_tdl = TfmdDL(cam_dsrc.train, after_item=ToTensor(),
                 after_batch=[IntToFloatTensor(), *aug_transforms()], bs=9)
cam_tdl.show_batch(max_n=9, vmin=1, vmax=30)

Point targets

mnist = untar_data(URLs.MNIST_TINY)
mnist_fn = 'images/mnist3.png'
pnts = np.array([[0,0], [0,35], [28,0], [28,35], [9, 17]])
def _pnt_lbl(fn)->None: return TensorPoint.create(pnts)
pnt_dsrc = Datasets([mnist_fn]*10, [[PILImage.create, Resize((35,28))], _pnt_lbl])
pnt_tdl = TfmdDL(pnt_dsrc.train, after_item=[PointScaler(), ToTensor()],
                 after_batch=[IntToFloatTensor(), *aug_transforms(max_warp=0)], bs=9)
pnt_tdl.show_batch(max_n=9)

Bounding boxes

coco = untar_data(URLs.COCO_TINY)
images, lbl_bbox = get_annotations(coco/'train.json')
idx=2
coco_fn,bbox = coco/'train'/images[idx],lbl_bbox[idx]

def _coco_bb(x):  return TensorBBox.create(bbox[0])
def _coco_lbl(x): return bbox[1]
coco_dsrc = Datasets([coco_fn]*10, [PILImage.create, [_coco_bb], [_coco_lbl, MultiCategorize(add_na=True)]], n_inp=1)
coco_tdl = TfmdDL(coco_dsrc, bs=9, after_item=[BBoxLabeler(), PointScaler(), ToTensor()],
                  after_batch=[IntToFloatTensor(), *aug_transforms()])

coco_tdl.show_batch(max_n=9)
{% endraw %}